Chelicerate GRAMPA

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

library(ggplot2)
library(cowplot)
#library(ggbeeswarm)
library(dplyr)
library(kableExtra)
#library(tidyr)
library(ggtree)
#library(phytools)
#library(phangorn)
#library(reshape2)
library(ggExtra)
#library(ggrepel)
#library(vroom)
#library(ggdist)
#library(stringr)
#library(ggsignif)
library(here)
source("../lib/design.r")
source("../lib/get_tree_info.r")
#source("C:/Users/grt814/bin/core/corelib/design.r")
#source("C:/Users/grt814/bin/core/get_tree_info.r")

#htmltools::includeHTML("../html-chunks/rmd_nav.html")

< Back to project

save_tree_fig = F
# Meta options. Comment tree_type when running form generator

cds_apro_full_file = here("data", "grampa", "cds-apro-full-grampa.txt")
cds_apro_spider_file = here("data", "grampa", "cds-apro-h1-5-grampa.txt")
cds_apro_hc_file = here("data", "grampa", "cds-apro-h1-7-grampa.txt")
cds_apro_hcs_file = here("data", "grampa", "cds-apro-h1-5-7-grampa.txt")
ball_full_file = here("data", "grampa", "ballesteros-full-grampa.txt")
trad_full_file = here("data", "grampa", "traditional-full-grampa.txt")

cds_apro_r70_full_file = here("data", "grampa", "cds-apro-r70-full-grampa.txt")
cds_apro_r70_spider_file = here("data", "grampa", "cds-apro-r70-h1-5-grampa.txt")
cds_apro_r70_hc_file = here("data", "grampa", "cds-apro-r70-h1-7-grampa.txt")
cds_apro_r70_hcs_file = here("data", "grampa", "cds-apro-r70-h1-5-7-grampa.txt")
ball_r70_full_file = here("data", "grampa", "ballesteros-r70-full-grampa.txt")
trad_r70_full_file = here("data", "grampa", "traditional-r70-full-grampa.txt")

cds_apro_r90_full_file = here("data", "grampa", "cds-apro-r90-full-grampa.txt")
cds_apro_r90_spider_file = here("data", "grampa", "cds-apro-r90-h1-5-grampa.txt")
cds_apro_r90_hc_file = here("data", "grampa", "cds-apro-r90-h1-7-grampa.txt")
cds_apro_r90_hcs_file = here("data", "grampa", "cds-apro-r90-h1-5-7-grampa.txt")
ball_r90_full_file = here("data", "grampa", "ballesteros-r90-full-grampa.txt")
trad_r90_full_file = here("data", "grampa", "traditional-r90-full-grampa.txt")

genome_file = here("data", "chelicerate_genomes-gwct.csv")

summary_file = here("data", "gene-trees.csv")

genome_data = read.csv(genome_file, header=T)
genome_data = subset(genome_data, Include=="Y")

spec_abbrs = select(genome_data, S.name, Abbr)
names(spec_abbrs)[2] = "label"

combined_scores = data.frame()

All searches done on 5680 rooted gene trees.

1 ASTRAL-pro species tree

1.2 Full search, bootstrap rearrange 70

1.2.1 Input species tree and score distribution

cur_data = read.csv(cds_apro_r70_full_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset = 4
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +#, aes(color=full_data$Family)) +
  xlim(0, 20) +
  geom_tiplab(size=4) +
  #scale_color_manual(name='Include?', values=c("N"=corecol(pal="wilke", numcol=1,offset=1), "Y"=corecol(numcol=1, offset=1))) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  
  geom_cladelabel(node=22, label="Araneae", align=T, color=corecol(numcol=1, offset=2), offset=famoffset) +
  geom_cladelabel(node=7, label="Scorpiones", align=T, color=corecol(numcol=1), offset=famoffset) +
  geom_cladelabel(node=27, label="Xiphosura", align=T, color=corecol(numcol=1, offset=4), offset=famoffset) +
  geom_cladelabel(node=28, label="Acariformes", align=T, color=corecol(numcol=1, offset=7), offset=famoffset) +
  geom_cladelabel(node=29, label="Parasitiformes", align=T, color=corecol(numcol=1, offset=1, pal="trek"), offset=famoffset) +
  geom_cladelabel(node=15, label="Diptera", align=T, color="#333333", offset=famoffset) +
  geom_cladelabel(node=16, label="Lepidoptera", align=T, color="#333333", offset=famoffset) +
  ggtitle(paste("CDS A-pro R70 full search tree\n(", nrow(cur_data), " trees)", sep=""))

#print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-10,max(cur_data$rank)+10)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)

# Combine tree and score figs

1.2.2 Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)

# Combine and render figs

cur_data_lowest$run = "CDS A-pro R70 full"
combined_scores = rbind(combined_scores, cur_data_lowest)

1.3 Full search, bootstrap rearrange 90

1.3.1 Input species tree and score distribution

cur_data = read.csv(cds_apro_r90_full_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset = 4
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +#, aes(color=full_data$Family)) +
  xlim(0, 20) +
  geom_tiplab(size=4) +
  #scale_color_manual(name='Include?', values=c("N"=corecol(pal="wilke", numcol=1,offset=1), "Y"=corecol(numcol=1, offset=1))) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  
  geom_cladelabel(node=22, label="Araneae", align=T, color=corecol(numcol=1, offset=2), offset=famoffset) +
  geom_cladelabel(node=7, label="Scorpiones", align=T, color=corecol(numcol=1), offset=famoffset) +
  geom_cladelabel(node=27, label="Xiphosura", align=T, color=corecol(numcol=1, offset=4), offset=famoffset) +
  geom_cladelabel(node=28, label="Acariformes", align=T, color=corecol(numcol=1, offset=7), offset=famoffset) +
  geom_cladelabel(node=29, label="Parasitiformes", align=T, color=corecol(numcol=1, offset=1, pal="trek"), offset=famoffset) +
  geom_cladelabel(node=15, label="Diptera", align=T, color="#333333", offset=famoffset) +
  geom_cladelabel(node=16, label="Lepidoptera", align=T, color="#333333", offset=famoffset) +
  ggtitle(paste("CDS A-pro R90 full search tree\n(", nrow(cur_data), " trees)", sep=""))

#print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-10,max(cur_data$rank)+10)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)

# Combine tree and score figs

1.3.2 Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)

# Combine and render figs

cur_data_lowest$run = "CDS A-pro R90 full"
combined_scores = rbind(combined_scores, cur_data_lowest)

1.5 H1 spider (5) and horseshoe crab (7) search, bootstrap rearrange 70

1.5.1 Input species tree and score distribution

cur_data = read.csv(cds_apro_r70_hcs_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset = 4
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +
  xlim(0, 20) +
  geom_tiplab(size=4) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  
  geom_point2(aes(subset=node==22), color=corecol(numcol=1, offset=2), size=5) +
  geom_point2(aes(subset=node==27), color=corecol(numcol=1, offset=2), size=5) +
  
  geom_cladelabel(node=22, label="Araneae", align=T, color=corecol(numcol=1, offset=2), offset=famoffset) +
  geom_cladelabel(node=7, label="Scorpiones", align=T, color=corecol(numcol=1), offset=famoffset) +
  geom_cladelabel(node=27, label="Xiphosura", align=T, color=corecol(numcol=1, offset=4), offset=famoffset) +
  geom_cladelabel(node=28, label="Acariformes", align=T, color=corecol(numcol=1, offset=7), offset=famoffset) +
  geom_cladelabel(node=29, label="Parasitiformes", align=T, color=corecol(numcol=1, offset=1, pal="trek"), offset=famoffset) +
  geom_cladelabel(node=15, label="Diptera", align=T, color="#333333", offset=famoffset) +
  geom_cladelabel(node=16, label="Lepidoptera", align=T, color="#333333", offset=famoffset) +
  
  ggtitle(paste("CDS A-pro R70 H1 spider+HC search tree\n(", nrow(cur_data), " trees)", sep=""))

#print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-10,max(cur_data$rank)+10)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)

# Combine tree and score figs

1.5.2 Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)

# Combine and render figs

# cur_data_lowest$run = "CDS A-pro R70 full"
# combined_scores = rbind(combined_scores, cur_data_lowest)

#############################################################

1.6 H1 spider (5) and horseshoe crab (7) search, bootstrap rearrange 90

1.6.1 Input species tree and score distribution

cur_data = read.csv(cds_apro_r90_hcs_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset = 4
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +
  xlim(0, 20) +
  geom_tiplab(size=4) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  
  geom_point2(aes(subset=node==22), color=corecol(numcol=1, offset=2), size=5) +
  geom_point2(aes(subset=node==27), color=corecol(numcol=1, offset=2), size=5) +
  
  geom_cladelabel(node=22, label="Araneae", align=T, color=corecol(numcol=1, offset=2), offset=famoffset) +
  geom_cladelabel(node=7, label="Scorpiones", align=T, color=corecol(numcol=1), offset=famoffset) +
  geom_cladelabel(node=27, label="Xiphosura", align=T, color=corecol(numcol=1, offset=4), offset=famoffset) +
  geom_cladelabel(node=28, label="Acariformes", align=T, color=corecol(numcol=1, offset=7), offset=famoffset) +
  geom_cladelabel(node=29, label="Parasitiformes", align=T, color=corecol(numcol=1, offset=1, pal="trek"), offset=famoffset) +
  geom_cladelabel(node=15, label="Diptera", align=T, color="#333333", offset=famoffset) +
  geom_cladelabel(node=16, label="Lepidoptera", align=T, color="#333333", offset=famoffset) +
  
  ggtitle(paste("CDS A-pro R90 H1 spider+HC search tree\n(", nrow(cur_data), " trees)", sep=""))

#print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-10,max(cur_data$rank)+10)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)

# Combine tree and score figs

1.6.2 Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)

# Combine and render figs

# cur_data_lowest$run = "CDS A-pro R70 full"
# combined_scores = rbind(combined_scores, cur_data_lowest)

#############################################################
#############################################################

## H1 spider (5) search

### Input species tree and score distribution

cur_data = read.csv(cds_apro_spider_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset =6
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +
  xlim(0, 14) +
  geom_tiplab(size=4) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  geom_point2(aes(subset=node==22), color=corecol(numcol=1, offset=2), size=5) +
  ggtitle(paste("CDS A-pro H1 spider search tree\n(", nrow(cur_data), " trees)", sep=""))

print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-1,max(cur_data$rank)+1)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)
# Combine tree and score figs
### Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)
# Combine and render figs

# cur_data_lowest$run = "CDS A-pro spider"
# combined_scores = rbind(combined_scores, cur_data_lowest)
## H1 spider (5) search, bootstrap rearrange 70

### Input species tree and score distribution

cur_data = read.csv(cds_apro_r70_spider_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset =6
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +
  xlim(0, 14) +
  geom_tiplab(size=4) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  geom_point2(aes(subset=node==22), color=corecol(numcol=1, offset=2), size=5) +
  ggtitle(paste("CDS A-pro R70 H1 spider search tree\n(", nrow(cur_data), " trees)", sep=""))

#print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-10,max(cur_data$rank)+10)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)
# Combine tree and score figs
### Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)
# Combine and render figs

# cur_data_lowest$run = "CDS A-pro R70 full"
# combined_scores = rbind(combined_scores, cur_data_lowest)
## H1 horseshoe crab (7) search

### Input species tree and score distribution

cur_data = read.csv(cds_apro_hc_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset =6
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +
  xlim(0, 14) +
  geom_tiplab(size=4) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  geom_point2(aes(subset=node==27), color=corecol(numcol=1, offset=2), size=5) +
  ggtitle(paste("CDS A-pro H1 horseshoe crab search tree\n(", nrow(cur_data), " trees)", sep=""))

#print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-1,max(cur_data$rank)+1)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)
# Combine tree and score figs
### Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)
# Combine and render figs

# cur_data_lowest$run = "CDS A-pro spider"
# combined_scores = rbind(combined_scores, cur_data_lowest)
## H1 horseshoe crab (7) search, bootstrap rearrange 70

### Input species tree and score distribution

cur_data = read.csv(cds_apro_r70_hc_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset =6
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +
  xlim(0, 14) +
  geom_tiplab(size=4) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  geom_point2(aes(subset=node==27), color=corecol(numcol=1, offset=2), size=5) +
  ggtitle(paste("CDS A-pro R70 H1 horseshoe crab\nsearch tree\n(", nrow(cur_data), " trees)", sep=""))

#print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-10,max(cur_data$rank)+10)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)
# Combine tree and score figs
### Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)
# Combine and render figs

# cur_data_lowest$run = "CDS A-pro R70 full"
# combined_scores = rbind(combined_scores, cur_data_lowest)

2 Monophyletic Arachnid species tree

2.1 Full search

2.1.1 Input species tree and score distribution

cur_data = read.csv(trad_full_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset = 4
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +
  xlim(0, 20) +
  geom_tiplab(size=4) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  
  #geom_point2(aes(subset=node==27), color=corecol(numcol=1, offset=2), size=5) +
  
  geom_cladelabel(node=22, label="Araneae", align=T, color=corecol(numcol=1, offset=2), offset=famoffset) +
  geom_cladelabel(node=7, label="Scorpiones", align=T, color=corecol(numcol=1), offset=famoffset) +
  geom_cladelabel(node=30, label="Xiphosura", align=T, color=corecol(numcol=1, offset=4), offset=famoffset) +
  geom_cladelabel(node=29, label="Acariformes", align=T, color=corecol(numcol=1, offset=7), offset=famoffset) +
  geom_cladelabel(node=27, label="Parasitiformes", align=T, color=corecol(numcol=1, offset=1, pal="trek"), offset=famoffset) +
  geom_cladelabel(node=15, label="Diptera", align=T, color="#333333", offset=famoffset) +
  geom_cladelabel(node=16, label="Lepidoptera", align=T, color="#333333", offset=famoffset) +
  
  ggtitle(paste("Monophyletic Arachnid search tree\n(", nrow(cur_data), " trees)", sep=""))

#print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-1,max(cur_data$rank)+1)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)

# Combine tree and score figs

2.2 Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)

# Combine and render figs

cur_data_lowest$run = "Monophyletic Arachnid full"
combined_scores = rbind(combined_scores, cur_data_lowest)

2.3 Full search, bootstrap rearrange 70

2.3.1 Input species tree and score distribution

cur_data = read.csv(trad_r70_full_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset = 4
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +
  xlim(0, 20) +
  geom_tiplab(size=4) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  #geom_point2(aes(subset=node==27), color=corecol(numcol=1, offset=2), size=5) +
  
  geom_cladelabel(node=22, label="Araneae", align=T, color=corecol(numcol=1, offset=2), offset=famoffset) +
  geom_cladelabel(node=7, label="Scorpiones", align=T, color=corecol(numcol=1), offset=famoffset) +
  geom_cladelabel(node=30, label="Xiphosura", align=T, color=corecol(numcol=1, offset=4), offset=famoffset) +
  geom_cladelabel(node=29, label="Acariformes", align=T, color=corecol(numcol=1, offset=7), offset=famoffset) +
  geom_cladelabel(node=27, label="Parasitiformes", align=T, color=corecol(numcol=1, offset=1, pal="trek"), offset=famoffset) +
  geom_cladelabel(node=15, label="Diptera", align=T, color="#333333", offset=famoffset) +
  geom_cladelabel(node=16, label="Lepidoptera", align=T, color="#333333", offset=famoffset) +
  
  ggtitle(paste("Monophyletic Arachnid R70 search tree\n(", nrow(cur_data), " trees)", sep=""))

#print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-10,max(cur_data$rank)+10)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)

# Combine tree and score figs

2.3.2 Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)

# Combine and render figs

cur_data_lowest$run = "Monophyletic Arachnid R70 full"
combined_scores = rbind(combined_scores, cur_data_lowest)

2.4 Full search, bootstrap rearrange 90

2.4.1 Input species tree and score distribution

cur_data = read.csv(trad_r90_full_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset = 4
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +
  xlim(0, 20) +
  geom_tiplab(size=4) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  #geom_point2(aes(subset=node==27), color=corecol(numcol=1, offset=2), size=5) +
  
  geom_cladelabel(node=22, label="Araneae", align=T, color=corecol(numcol=1, offset=2), offset=famoffset) +
  geom_cladelabel(node=7, label="Scorpiones", align=T, color=corecol(numcol=1), offset=famoffset) +
  geom_cladelabel(node=30, label="Xiphosura", align=T, color=corecol(numcol=1, offset=4), offset=famoffset) +
  geom_cladelabel(node=29, label="Acariformes", align=T, color=corecol(numcol=1, offset=7), offset=famoffset) +
  geom_cladelabel(node=27, label="Parasitiformes", align=T, color=corecol(numcol=1, offset=1, pal="trek"), offset=famoffset) +
  geom_cladelabel(node=15, label="Diptera", align=T, color="#333333", offset=famoffset) +
  geom_cladelabel(node=16, label="Lepidoptera", align=T, color="#333333", offset=famoffset) +
  
  ggtitle(paste("Monophyletic Arachnid R90 search tree\n(", nrow(cur_data), " trees)", sep=""))

#print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-10,max(cur_data$rank)+10)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)

# Combine tree and score figs

2.5 Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)

# Combine and render figs

cur_data_lowest$run = "Monophyletic Arachnid R90 full"
combined_scores = rbind(combined_scores, cur_data_lowest)

3 Ballesteros species tree

3.1 Full search

3.1.1 Input species tree and score distribution

cur_data = read.csv(ball_full_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset = 4
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +
  xlim(0, 20) +
  geom_tiplab(size=4) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  
  #geom_point2(aes(subset=node==27), color=corecol(numcol=1, offset=2), size=5) +
  
  geom_cladelabel(node=22, label="Araneae", align=T, color=corecol(numcol=1, offset=2), offset=famoffset) +
  geom_cladelabel(node=7, label="Scorpiones", align=T, color=corecol(numcol=1), offset=famoffset) +
  geom_cladelabel(node=27, label="Xiphosura", align=T, color=corecol(numcol=1, offset=4), offset=famoffset) +
  geom_cladelabel(node=30, label="Acariformes", align=T, color=corecol(numcol=1, offset=7), offset=famoffset) +
  geom_cladelabel(node=29, label="Parasitiformes", align=T, color=corecol(numcol=1, offset=1, pal="trek"), offset=famoffset) +
  geom_cladelabel(node=15, label="Diptera", align=T, color="#333333", offset=famoffset) +
  geom_cladelabel(node=16, label="Lepidoptera", align=T, color="#333333", offset=famoffset) +
  
  ggtitle(paste("Ballesteros full search tree\n(", nrow(cur_data), " trees)", sep=""))

#print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-1,max(cur_data$rank)+1)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)

# Combine tree and score figs

3.1.2 Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)

# Combine and render figs

cur_data_lowest$run = "Ballesteros full"
combined_scores = rbind(combined_scores, cur_data_lowest)

3.2 Full search, bootstrap rearrange 70

3.2.1 Input species tree and score distribution

cur_data = read.csv(ball_r70_full_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset = 4
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +
  xlim(0, 20) +
  geom_tiplab(size=4) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  #geom_point2(aes(subset=node==27), color=corecol(numcol=1, offset=2), size=5) +
  
  geom_cladelabel(node=22, label="Araneae", align=T, color=corecol(numcol=1, offset=2), offset=famoffset) +
  geom_cladelabel(node=7, label="Scorpiones", align=T, color=corecol(numcol=1), offset=famoffset) +
  geom_cladelabel(node=27, label="Xiphosura", align=T, color=corecol(numcol=1, offset=4), offset=famoffset) +
  geom_cladelabel(node=30, label="Acariformes", align=T, color=corecol(numcol=1, offset=7), offset=famoffset) +
  geom_cladelabel(node=29, label="Parasitiformes", align=T, color=corecol(numcol=1, offset=1, pal="trek"), offset=famoffset) +
  geom_cladelabel(node=15, label="Diptera", align=T, color="#333333", offset=famoffset) +
  geom_cladelabel(node=16, label="Lepidoptera", align=T, color="#333333", offset=famoffset) +
  
  ggtitle(paste("Ballesteros R70 search tree\n(", nrow(cur_data), " trees)", sep=""))

#print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-10,max(cur_data$rank)+10)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)

# Combine tree and score figs

3.2.2 Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)

# Combine and render figs

cur_data_lowest$run = "Ballesteros R70 full"
combined_scores = rbind(combined_scores, cur_data_lowest)

3.3 Full search, bootstrap rearrange 90

3.3.1 Input species tree and score distribution

cur_data = read.csv(ball_r90_full_file, comment.char="#", header=T, sep="\t")
# Read the GRAMPA results

cur_data$Tree.string = paste(cur_data$Tree.string, ";", sep="")
# Add the semi-colons to the trees so they can be read by R

cur_data = cur_data[order(cur_data$Total.score), ]
# Re-order the rows by score

cur_data$rank = seq_along(cur_data[,1])
# Add a column for rank (just row number in re-ordered rows)

#cds_apro_r70_full$rank = as.numeric(as.character(cds_apro_r70_full$rank))
# Convert ranks to numeric

####################

st_string = cur_data$Tree.string[cur_data$Tree=="ST"]
# Extract the species tree string

st = read.tree(text=st_string)
# Read the species tree

famcol = corecol(pal="wilke", numcol=1, offset=2)
famoffset = 4
# Order labels

cur_st_fig = ggtree(st, size=0.8, ladderize=T) +
  xlim(0, 20) +
  geom_tiplab(size=4) +
  #geom_text(aes(label=node), hjust=-.3, vjust=-.3, color="#ff6db6") +
  
  geom_cladelabel(node=22, label="Araneae", align=T, color=corecol(numcol=1, offset=2), offset=famoffset) +
  geom_cladelabel(node=7, label="Scorpiones", align=T, color=corecol(numcol=1), offset=famoffset) +
  geom_cladelabel(node=27, label="Xiphosura", align=T, color=corecol(numcol=1, offset=4), offset=famoffset) +
  geom_cladelabel(node=30, label="Acariformes", align=T, color=corecol(numcol=1, offset=7), offset=famoffset) +
  geom_cladelabel(node=29, label="Parasitiformes", align=T, color=corecol(numcol=1, offset=1, pal="trek"), offset=famoffset) +
  geom_cladelabel(node=15, label="Diptera", align=T, color="#333333", offset=famoffset) +
  geom_cladelabel(node=16, label="Lepidoptera", align=T, color="#333333", offset=famoffset) +
  
  ggtitle(paste("Ballesteros R90 search tree\n(", nrow(cur_data), " trees)", sep=""))

#print(cur_st_fig)
# Plot tree

####################

cur_scores_fig = ggplot(cur_data, aes(x=rank, y=Total.score)) +
  geom_point(size=2, color=corecol(pal="wilke", numcol=1), alpha=0.33) +
  scale_x_continuous(limits=c(-10,max(cur_data$rank)+10)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()
#print(cds_apro_r70_full_scores_fig)
# Plot scores

####################

cur_figs = plot_grid(cur_st_fig, cur_scores_fig, rel_widths=c(0.8,1))
print(cur_figs)

# Combine tree and score figs

3.3.2 Lowest scoring trees

cur_data_lowest = head(cur_data, 6)
# Get the lowest scoring trees

tree_fig_list = list()
# A list to store tree figures

for(i in 1:nrow(cur_data_lowest)){
  tree = read.tree(text=cur_data_lowest[i,]$Tree.string)
  # Read the current tree
  
  tree_fig = ggtree(tree, size=0.5, ladderize=T) +
  xlim(0, 14) +
    geom_tiplab(size=2.5) +
  ggtitle(paste(cur_data_lowest[i,]$Tree, " (", cur_data_lowest[i,]$Total.score, ")", sep=""))
  # Plot the current tree
  
  tree_fig_list[[i]] = tree_fig
  # Add the tree to the tree list
}

tree_figs = plot_grid(plotlist=tree_fig_list, ncol=3)
print(tree_figs)

# Combine and render figs

cur_data_lowest$run = "Ballesteros R90 full"
combined_scores = rbind(combined_scores, cur_data_lowest)

4 Comparing scores with different species trees

Among the three species trees (our CDS ASTRAL-pro tree, a monophyletic Arachnid tree, and the Ballesteros tree), our tree and the Ballesteros tree score roughly equally (green vs. orange lines). In all three cases, bootstrap rearrangement improves scores, and widens the gap between our tree and the Ballesteros tree (yellow vs light blue lines).

scores_fig = ggplot(combined_scores, aes(x=rank, y=Total.score, color=run)) +
  geom_point(size=3) +
  geom_line() +
  scale_color_manual(values=corecol(numcol=9)) +
  xlab("Rank") +
  ylab("GRAMPA score") +
  bartheme()# +
  #theme(legend.position="bottom")
print(scores_fig)